Seurat pipeline of singlet GEX data - without doublets
DefaultAssay(seu) <- "RNA"
seu <- NormalizeData(seu, normalization.method = "LogNormalize", scale.factor = 10000)
seu <- FindVariableFeatures(seu, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(seu), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(seu)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 611 rows containing missing values (geom_point).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 611 rows containing missing values (geom_point).

all.genes <- rownames(seu)
seu <- ScaleData(seu, features = all.genes)
## Centering and scaling data matrix
seu <- RunPCA(seu, features = VariableFeatures(object = seu))
## PC_ 1
## Positive: loxl5b, col9a3, col9a1b, FQ323119.1, zmp:0000000760, col5a3a, znf385b, col2a1a, cmn, ntd5
## rcn3, col11a1a, tdgf1, col9a2, col2a1b, paqr4b, col8a1a, p4ha1b, loxl3b, CU655820.1
## plod1a, slc38a8b, lama5, ablim2, cnmd, caskin1, shha, emilin3a, znf385a, robo4
## Negative: cntfr, nrp2a, pdgfra, ednrab, mdkb, pax3a, cdh6, kif21b, rxraa, col18a1a
## col15a1b, alcama, CU467961.1, si:dkey-250k10.4, foxd3, kcnab1a, zic2b, itga9, crlf1a, sdk2b
## zgc:174888, lima1a, pmp22a, msx1b, cdh11, epha4b, celf2, dusp5, crestin, gphna
## PC_ 2
## Positive: col4a5, actb2, cdon, plpp3, slit2, tram1, cdh6, ednrab, actn1, qkib
## her9, plod1a, p4ha1b, rrbp1b, pmp22a, bnc2, si:dkey-22o22.2, ccn1, CU467961.1, pdgfra
## cd151l, caskin1, ugcg, mdkb, kaznb, kdelr2b, kif21b, sec61a1l, creb5b, loxl5b
## Negative: txlnba, obsl1a, actc1b, nexn, desma, tnni2b.1, tpma, smyhc1, neb, chrng
## tmem38a, aldoab, srl, ttn.2, xirp1, acta1b, tnnc2, ryr1b, hsp90aa1.1, musk
## asb12b, ttn.1, trdn, BX571827.3, klhl31, chrnd, cacng1a, obscnb, ENSDARG00000112015, tnnt2c
## PC_ 3
## Positive: spaca4l, krt4, cyt1, cyt1l, zgc:174938, cd9b, krt92, si:dkey-87o1.2, epcam, krt5
## cldne, si:ch211-195b11.3, krt17, cldn7b, agr1, cldnb, krt91, anxa1c, gpa33a, oclna
## si:cabz01007794.1, ppl, spint2, evplb, eppk1, gbgt1l4, zgc:153665, cfl1l, krt97, si:ch73-347e22.8
## Negative: epha4a, serpinh1b, foxd3, pax3a, lhfpl6, zic2b, mtss1lb, rxraa, gdf11, hspb1
## si:dkey-22o22.2, nav2a, gphna, qkib, itga9, kif21b, alg6, lpar1, foxd3-mCherry, kcnab1a
## cdon, cdh6, pdzd2, caskin1, slc2a11b, tspan36, BX663503.3, hoxc3a, ENSDARG00000024966, col15a1b
## PC_ 4
## Positive: nrp2a, tns2a, ednrab, actb2, pcdh9, tspan36, f3b, pmp22a, bnc2, flna
## si:dkey-250k10.4, qkib, slc2a11b, alcama, phldb2a, CU138533.1, pdgfra, trpm1b, kif21b, dusp5
## tgfbr3, cax1, zgc:174888, crlf1a, aox5, slc2a15b, slc45a2, alx4a, bace2, crestin
## Negative: hoxc3a, mllt3, hoxc6b, fn1b, kif26ab, msgn1, pcdh8, large2, tbx16, her1
## hspb1, tbx16l, kank1a, ENSDARG00000104264, apoc1, rbm38, aopep, lpar1, draxin, adamts18
## samd10b, ism1, dlc, cyp26a1, epha4a, dld, tuba8l2, wnt8a.1, esrrga, gdf11
## PC_ 5
## Positive: aox5, slc2a15b, cax1, slc45a2, CABZ01021592.1, trpm1b, gch2, fn1b, gpr143, si:dkey-106n21.1
## akr1b1.1, lrmda, pcdh8, syngr1a, tspan36, bace2, rab32a, pts, lpar1, pcdh9
## CU138533.1, her1, slc2a11b, cx30.3, samd10b, mocs1, slc2a15a, pax7b, tyr, tfeb
## Negative: ncam1a, plch1, epb41a, rfx4, negr1, ctnnd2b, crb2a, cep131, nfasca, ctnna2
## elavl3, brsk2b, rnf220a, chl1a, nbeaa, col18a1a, adgrv1, gadd45gb.1, epha7, dlb
## ebf2, dacha, ptprnb, epha4b, ek1, rassf7a, tmem108, inavaa, crb1, ptprna
# Examine and visualize PCA results a few different ways
print(seu[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: loxl5b, col9a3, col9a1b, FQ323119.1, zmp:0000000760
## Negative: cntfr, nrp2a, pdgfra, ednrab, mdkb
## PC_ 2
## Positive: col4a5, actb2, cdon, plpp3, slit2
## Negative: txlnba, obsl1a, actc1b, nexn, desma
## PC_ 3
## Positive: spaca4l, krt4, cyt1, cyt1l, zgc:174938
## Negative: epha4a, serpinh1b, foxd3, pax3a, lhfpl6
## PC_ 4
## Positive: nrp2a, tns2a, ednrab, actb2, pcdh9
## Negative: hoxc3a, mllt3, hoxc6b, fn1b, kif26ab
## PC_ 5
## Positive: aox5, slc2a15b, cax1, slc45a2, CABZ01021592.1
## Negative: ncam1a, plch1, epb41a, rfx4, negr1
DimPlot(seu, reduction = "pca", group.by = "sample")

ElbowPlot(seu, ndims = 50)

seu <- FindNeighbors(seu, dims = 1:25)
## Computing nearest neighbor graph
## Computing SNN
seu <- FindClusters(seu, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 64340
## Number of edges: 2437107
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9407
## Number of communities: 30
## Elapsed time: 19 seconds
## 1 singletons identified. 29 final clusters.
# If you haven't installed UMAP, you can do so via reticulate::py_install(packages =
# 'umap-learn')
seu <- RunUMAP(seu, dims = 1:25)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:40:55 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:40:55 Read 64340 rows and found 25 numeric columns
## 15:40:55 Using Annoy for neighbor search, n_neighbors = 30
## 15:40:55 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:41:09 Writing NN index file to temp file /Filers/scratch/zhu/4891762//Rtmpbudq8Q/file3d8169a6a35d
## 15:41:09 Searching Annoy index using 1 thread, search_k = 3000
## 15:41:36 Annoy recall = 100%
## 15:41:37 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 15:41:42 Initializing from normalized Laplacian + noise (using irlba)
## 15:41:53 Commencing optimization for 200 epochs, with 2915880 positive edges
## 15:43:26 Optimization finished
# saveRDS(seu, file = "../rds/seu_8batches_RNAonly_soupxFiltered_DoubletFinderFiltered_clustered_subsetArchR.rds")
Clustering results
seu$stage <- factor(seu$stage, levels = c("epiboly-4ss","6-10ss","12-16ss","18-22ss"))
p1 <- DimPlot(seu, label = TRUE, reduction = "umap",cols = alpha(col.ls, 0.3))
p2 <- DimPlot(seu, reduction = "umap", group.by = "genotype",cols = alpha(genotype_cols, 0.4), pt.size = 0.5)
p3 <- DimPlot(seu, reduction = "umap", group.by = "stage", cols = alpha(stage_cols[c(4,6,8,10)], 0.4), pt.size = 0.5)
p1 + p2 + p3

DimPlot(seu, reduction = "umap", group.by = "genotype", cols = alpha(sample_cols, 0.4), pt.size = 0.1, split.by = "stage")

DimPlot(seu, reduction = "umap", group.by = "stage", cols = alpha(stage_cols[c(4,6,8,10)], 0.4), pt.size = 0.1, split.by = "genotype")

p1 <- DimPlot(seu, reduction = "umap", group.by = "sample", cols = alpha(sample_cols, 0.4), pt.size = 0.1)
p1

Expression of known marker genes
FeaturePlot(seu, features = c("foxd3", "foxd3-mCherry","foxd3-citrine","alg6"), pt.size = 0.1, ncol = 4)

known_markers <- c("foxd3", "sox10","pax3a","tfap2a",
"sox3","sox19b",
"mafba","egr2b","hoxb3a",
"tbxta","sox2","msgn1")
FeaturePlot(seu, features = c(known_markers), ncol = 6)

VlnPlot(seu, features = c(known_markers), ncol = 3, pt.size = 0)

VlnPlot(seu, features = c("krt4"), pt.size = 0.1)

FeaturePlot(seu, features = c("elavl3"), pt.size = 0.1)

Find new markers
nc.markers <- FindAllMarkers(seu, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
## Calculating cluster 19
## Calculating cluster 20
## Calculating cluster 21
## Calculating cluster 22
## Calculating cluster 23
## Calculating cluster 24
## Calculating cluster 25
## Calculating cluster 26
## Calculating cluster 27
## Calculating cluster 28
nc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC) -> top2
top2
## # A tibble: 58 × 7
## # Groups: cluster [29]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 3.08 0.901 0.246 0 0 pmp22a
## 2 0 1.79 0.963 0.48 0 0 rxraa
## 3 0 2.74 0.862 0.132 0 1 ncam1a
## 4 0 1.97 0.669 0.144 0 1 crb2a
## 5 0 2.50 0.982 0.533 0 2 cdon
## 6 0 2.32 0.619 0.107 0 2 alx4a
## 7 0 3.20 0.809 0.287 0 3 apoc1
## 8 0 3.14 0.796 0.08 0 3 samd10b
## 9 0 2.65 0.761 0.115 0 4 esrrga
## 10 0 2.65 0.964 0.339 0 4 kif26ab
## # … with 48 more rows
# top5 <- nc.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
# DoHeatmap(nc_wt, features = (top5$gene))
# df <- data.frame(
# clusters = 0:22,
# names = c("NC","neural cells (ncam1a+)", "hindbrain NC (fn1b+)","NC neural","notochord", # 0to4
# "polster (low nfeature)","epidermis","cranial cartilage (col12a1b+)","cranial cartilage (col11a1b+)",# 5to8
# "mut-specific (lmx1bb+)","primordial germ cells","mesodermal","pigment", # 9to12
# "ciliated (dnah2)","sensory","periderm","skeletal mucsle","vascular tubulogenesis", # 13-17
# "polster","erythropoiesis","low nfeature","myeloid","retinal development" # 18-22
# )
# )
# seu$cell_types <- df$names[match(seu$seurat_clusters, df$clusters)]
#
# DimPlot(seu, label = TRUE, group.by = "cell_types",reduction = "umap")
saveRDS(seu, "/t1-data/project/tsslab/zhu/multiome/analysis_newref/clustering/rds/seuobj_rna/seu_RNAsoupx64340_clustered.rds")
write.csv(nc.markers, "/t1-data/project/tsslab/zhu/multiome/analysis_newref/clustering/results/seu_RNAsoupx64340_markers_20221226.csv")